package dblp.social.importer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.log4j.Logger;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import dblp.social.exceptions.SodaHibernateException;
import dblp.social.hibernate.ISodaHibernateSession;
import dblp.social.loader.PartFile;

/**
 * This class represents an instance of a dblp Parser, which parses the dblp data formatted as an XML (of known DTD),
 * formats it into a specific model and stores it into a SQL db using hibernate.
 * The actual parsing is done using the standard Java SAX XMLReader which is then customized to our needs
 * by creating our own handler which can tell the XMLReader how to handle, interpret and format the dblp data and eventually store it
 *
 * @see org.xml.sax.XMLReader
 * @see org.xml.sax.helpers.DefaultHandler
 * @author Staffiero
 *
 */
public class DblpParser {

	private XMLReader xmlReader;
	
	private static Logger logger = Logger.getLogger(DblpParser.class);
	
	/**
	 * Default constructor
	 */
	public DblpParser(){}


	/**
	 * Parses a single dblp part file using the DblpImporterHandler. 
	 * A part file is an xml file which represents a portion of the original dblp xml file.
	 * The part file is generated by the pre-parser.
	 * It provides support for checkpoints, it can resume a previous parse.
	 * Use this method to parse each dblp part file sequentially, this method does not provide support for multi threading.
	 * 
	 * @param pf: the PartFile object which represents the part file to be parsed
	 * @param session: a ISodaHibernateSession used to persist data.
	 * @return true if when the method exits the part file has been updated.
	 * @throws SodaHibernateException
	 * @throws SAXException 
	 * @throws IOException 
	 */
	public boolean parseDbpl(PartFile pf,ISodaHibernateSession session) throws SodaHibernateException, SAXException, IOException{
		if (!session.isOpen())
			session.open();

		this.xmlReader = XMLReaderFactory.createXMLReader();
		FileInputStream fis;
		
		//if the part file is not marked as complete parses that file
		if (!pf.isComplete()){
			DefaultHandler handl = new DblpImporterHandler(session, pf);
			this.xmlReader.setContentHandler(handl);
			String filePath = pf.getFile().getAbsolutePath();
			fis = new FileInputStream(new File(filePath));
			
			try{
				logger.debug("Starting the sax-xml parser");		
				this.xmlReader.parse(new InputSource(fis));
				logger.debug("Sax-xml parser returned");
				
			}
			catch (SAXException se){
				logger.debug("Exception while parsing part file "+pf.getFile().getName()+": "+se.getMessage());
			}
			catch(Exception e){
				logger.error("Exception while parsing part file "+pf.getFile().getName()+": "+e.getMessage());
				//e.printStackTrace();
			}
			
			fis.close();
		}
		logger.debug("File "+pf.getFile().getName()+" completed");
		if (session.isOpen())
			session.close();
		return pf.hasBeenUpdated();
	}
	
	/**
	 * Parses a single part file using the DblpImporterHandler. 
	 * It provides support for checkpoints, it can resume a previous parse.
	 * Use this method if the parse is called within a thread.
	 * 
	 * @param pf: the PartFile object which represents the part file to be parsed
	 * @param monitor: the threads monitor
	 * @param session: a ISodaHibernateSession used to persist data
	 * @throws SAXException
	 * @throws IOException
	 * @throws SodaHibernateException
	 */
	public void parseDbpl(PartFile pf, ThreadMonitor monitor, ISodaHibernateSession session) 
		throws SAXException, IOException, SodaHibernateException{
		
		this.xmlReader = XMLReaderFactory.createXMLReader();
		FileInputStream fis;
		
		//if the part file is complete do nothing
		if (!pf.isComplete()){
			if (!session.isOpen())
				session.open();
			//creates the appropriate handler
			DefaultHandler handl = new DblpImporterHandler(session, pf, monitor);
			this.xmlReader.setContentHandler(handl);
			String filePath = pf.getFile().getAbsolutePath();
			fis = new FileInputStream(new File(filePath));
			try{
				
				logger.debug("Starting the sax-xml parser");
				this.xmlReader.parse(new InputSource(fis));
				logger.debug("Sax-xml parser returned");

				if (session.isOpen())
					session.close();	
				fis.close();
			}
			catch (SAXException se){
				
				fis.close();
				if (session.isOpen())
					session.clear();
				//forwards the exception
				throw se;
			}
			//TODO Debug Code
			catch (Exception e){
				logger.error("Exception while parsing part file "+pf.getFile().getName()+": "+e.getMessage());
				e.printStackTrace();
				if (session.isOpen())
					session.clear();
				//forwards the exception
				throw new SAXException(e);
			}
			
			
		}		
	}
}
